clear all;
close all;

%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile prepares a fund-month-level dataset on flow-driven and discretionary (i. e., non-flow-driven) asset purchases, 
%  based on the taxonomy of Jiang et al.
% 
%  =======================================================================================================================================================
% 
%  Major output:
%  - Fund-month level dataset on flow-driven and discretionary (i. e., non-flow-driven) asset purchases (based on the taxonomy of Jiang et al.)
%  =======================================================================================================================================================
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


%% Set project directory

projectPath = 'C:\ConnectedFunds_Codebase\'
addpath(projectPath)

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));

cd 'C:\ConnectedFunds_Codebase\Code\'

% Set window state to get figures printed in full-screen
set(groot, 'defaultFigureWindowState', 'maximized')

% Get current time
StartTotalTime = datetime;


%% Load data

% Load data on asset portfolio changes
load(strcat(projectPath, 'Data\JiangEtAl\All_Results_DeltaHoldings.mat'))
All_Results_DeltaHoldings.ISIN = cellstr(All_Results_DeltaHoldings.ISIN);

% Load data on cash holdings and netflows
load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));

% Load VIX data
% (Source: https://www.cboe.com/tradable_products/vix/vix_historical_data/)
VIX = readtable(strcat(projectPath, 'Data\VIX\VIX_monthly.csv'));


%% Build data set (MainData)

udates = unique(IFS_All.DATUM);
udates = udates(udates >= 200908);

IFS_All = IFS_All(:, {'DATUM', 'ISIN', 'SPEZIAL', 'artmittel', 'FONDSVERM', 'BANKG', 'MITTELZUFL', 'MITTELABFL', 'AKRED', 'Return', 'USES_DERIVATIVES'});

lag = table();

for dateNr = 2 : length(udates)
    
    udates(dateNr)    
    
    tmp_lag = IFS_All(IFS_All.DATUM == udates(dateNr - 1), {'DATUM', 'ISIN', 'FONDSVERM', 'BANKG', 'AKRED'});
    tmp_lag.DATUM = repmat(udates(dateNr), length(tmp_lag.DATUM), 1);
    tmp_lag.Properties.VariableNames('BANKG') = {'BANKG_lag'};
    tmp_lag.Properties.VariableNames('AKRED') = {'AKRED_lag'};
    tmp_lag.Properties.VariableNames('FONDSVERM') = {'FONDSVERM_lag'};
    
    lag = [lag; tmp_lag];
    
end

IFS_All = outerjoin(IFS_All, lag, 'Keys', {'DATUM', 'ISIN'}, 'MergeKeys', true, 'type', 'left');

clear dateNr lag tmp*

MainData = outerjoin(All_Results_DeltaHoldings, IFS_All(:, {'DATUM', 'ISIN', 'SPEZIAL', 'artmittel', 'FONDSVERM', 'FONDSVERM_lag', 'BANKG', 'BANKG_lag', 'MITTELZUFL', 'MITTELABFL', 'AKRED', 'AKRED_lag', 'Return', 'USES_DERIVATIVES'}), 'Keys', {'DATUM', 'ISIN'}, 'MergeKeys', true, 'type', 'left');

VIX.Properties.VariableNames('datum') = {'DATUM'};

udates = unique(VIX.DATUM);

lag = table();

for dateNr = 2 : length(udates)
    
    udates(dateNr)    
    
    tmp_lag = VIX(VIX.DATUM == udates(dateNr - 1), :);
    tmp_lag.DATUM = repmat(udates(dateNr), length(tmp_lag.DATUM), 1);
    tmp_lag.Properties.VariableNames('vixopen')  = {'vixopen_lag'};
    tmp_lag.Properties.VariableNames('vixhigh')  = {'vixhigh_lag'};
    tmp_lag.Properties.VariableNames('vixlow')   = {'vixlow_lag'};
    tmp_lag.Properties.VariableNames('vixclose') = {'vixclose_lag'};
    
    lag = [lag; tmp_lag];
    
end

VIX = outerjoin(VIX, lag, 'Keys', {'DATUM'}, 'MergeKeys', true, 'type', 'left');

clear dateNr lag tmp*

MainData = outerjoin(MainData, VIX, 'Keys', {'DATUM'}, 'MergeKeys', true, 'type', 'left');

MainData = movevars(MainData, {'SPEZIAL', 'artmittel', 'FONDSVERM'}, 'After', 'ISIN');
MainData.FONDSVERM      = MainData.FONDSVERM * 1000;
MainData.FONDSVERM_lag  = MainData.FONDSVERM_lag * 1000;
MainData.BANKG          = MainData.BANKG * 1000;
MainData.BANKG_lag      = MainData.BANKG_lag * 1000;
MainData.AKRED          = MainData.AKRED * 1000;
MainData.AKRED_lag      = MainData.AKRED_lag * 1000;
MainData.MITTELABFL     = MainData.MITTELABFL * 1000;
MainData.MITTELZUFL     = MainData.MITTELZUFL * 1000;
MainData.NETFLOWS       = MainData.MITTELZUFL - MainData.MITTELABFL;
MainData.Delta_BANKG    = MainData.BANKG - MainData.BANKG_lag;
MainData.Delta_VIX      = MainData.vixclose - MainData.vixclose_lag;

udates = unique(MainData.DATUM);

MainData = movevars(MainData, 'FONDSVERM_lag', 'After', 'FONDSVERM');
MainData = movevars(MainData, 'BANKG_lag', 'After', 'BANKG');
MainData = movevars(MainData, 'AKRED_lag', 'After', 'AKRED');


%% Set up taxonomy following Jiang et al.

MainData.Purchase_FlowDriven    = nan(length(MainData.DATUM), 1);
MainData.Purchase_Discretionary = nan(length(MainData.DATUM), 1);

% Case 1
MainData.Purchase_FlowDriven(MainData.Delta_BANKG <= MainData.NETFLOWS & MainData.NETFLOWS < 0)    = 0;
MainData.Purchase_Discretionary(MainData.Delta_BANKG <= MainData.NETFLOWS & MainData.NETFLOWS < 0)  = MainData.NETFLOWS(MainData.Delta_BANKG <= MainData.NETFLOWS & MainData.NETFLOWS < 0) - MainData.Delta_BANKG(MainData.Delta_BANKG <= MainData.NETFLOWS & MainData.NETFLOWS < 0);

% Case 2
MainData.Purchase_FlowDriven(MainData.NETFLOWS < MainData.Delta_BANKG & MainData.Delta_BANKG <= 0)    = MainData.NETFLOWS(MainData.NETFLOWS < MainData.Delta_BANKG & MainData.Delta_BANKG <= 0) - MainData.Delta_BANKG(MainData.NETFLOWS < MainData.Delta_BANKG & MainData.Delta_BANKG <= 0);
MainData.Purchase_Discretionary(MainData.NETFLOWS < MainData.Delta_BANKG & MainData.Delta_BANKG <= 0) = 0 ;

% Case 3
MainData.Purchase_FlowDriven(MainData.NETFLOWS < 0 & 0 < MainData.Delta_BANKG)    = MainData.NETFLOWS(MainData.NETFLOWS < 0 & 0 < MainData.Delta_BANKG);
MainData.Purchase_Discretionary(MainData.NETFLOWS < 0 & 0 < MainData.Delta_BANKG) = -MainData.Delta_BANKG(MainData.NETFLOWS < 0 & 0 < MainData.Delta_BANKG);

% Case 4
MainData.Purchase_FlowDriven(MainData.Delta_BANKG < 0 & 0 <= MainData.NETFLOWS)    = MainData.NETFLOWS(MainData.Delta_BANKG < 0 & 0 <= MainData.NETFLOWS) ;
MainData.Purchase_Discretionary(MainData.Delta_BANKG < 0 & 0 <= MainData.NETFLOWS) = -MainData.Delta_BANKG(MainData.Delta_BANKG < 0 & 0 <= MainData.NETFLOWS) ;

% Case 5
MainData.Purchase_FlowDriven(0 <= MainData.Delta_BANKG & MainData.Delta_BANKG < MainData.NETFLOWS)    = MainData.NETFLOWS(0 <= MainData.Delta_BANKG & MainData.Delta_BANKG < MainData.NETFLOWS) - MainData.Delta_BANKG(0 <= MainData.Delta_BANKG & MainData.Delta_BANKG < MainData.NETFLOWS);
MainData.Purchase_Discretionary(0 <= MainData.Delta_BANKG & MainData.Delta_BANKG < MainData.NETFLOWS) = 0;

% Case 6
MainData.Purchase_FlowDriven(0 <= MainData.NETFLOWS & MainData.NETFLOWS < MainData.Delta_BANKG)    = 0;
MainData.Purchase_Discretionary(0 <= MainData.NETFLOWS & MainData.NETFLOWS < MainData.Delta_BANKG) = MainData.NETFLOWS(0 <= MainData.NETFLOWS & MainData.NETFLOWS < MainData.Delta_BANKG) - MainData.Delta_BANKG(0 <= MainData.NETFLOWS & MainData.NETFLOWS < MainData.Delta_BANKG);


%% Save data
writetable(MainData, strcat(projectPath, 'Data\MainData.csv'));